per_read_modified_base_calls.txt but with binary methylation calls using the default cut-offs, i.e., filter out probabilities >20% and <80%, like the bedMethyl files but not aggregated.# see here (https://nanoporetech.github.io/megalodon/file_formats.html) for variable names
library(data.table)
binaryCalls <- fread(file = "/Users/koenvandenberge/data/molly/megalodon_output_06/per_read_modified_base_calls_binary.txt.gz",
nrows = 1e5)
colnames(binaryCalls) <- c("readID", "chr", "strand", "pos",
"mod_log_prob", "can_log_prob", "mod_base")
binaryCalls$methylation <- ifelse(binaryCalls$mod_log_prob > -0.2231436, 1, 0)
table(binaryCalls$methylation)
##
## 0 1
## 89988 10012
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.2 ✔ purrr 0.3.3
## ✔ tibble 2.1.3 ✔ dplyr 1.0.3
## ✔ tidyr 1.0.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
readCalls <- binaryCalls %>%
group_by(readID) %>%
summarize(avgMeth = mean(methylation),
length = n())
hist(readCalls$avgMeth, breaks=20)
logit <- function(x) log(x / (1-x))
plot(x=readCalls$length, y=logit(readCalls$avgMeth), log="x",
xlab = "Number of bases with confident methylation calls in a read",
ylab = "Logit average methylation")
Below we look at autocorrelation for random reads. It is clear that there is strong autocorrelation. This is very obvious when comparing the autocorrelation function of the observed data versus that of permuted reads.
Note the interesting observation that the ACF function appears bimodal. Clould this be related to nucleosome positions? Since the spacing is unequal across reads, does the multi-modality relate to a particular genomic distance?
rafalib::mypar(mfrow=c(4,4),
mar=c(1,2.5,3.2,1))
#for(rr in 1:27){
for(rr in 100:127){
curAvgMeth <- readCalls$avgMeth[rr]
if(curAvgMeth >0 & curAvgMeth < 1){
rid <- readCalls$readID[rr]
curDf <- binaryCalls[binaryCalls$readID == rid,]
curRange <- range(curDf$pos)
curChr <- unique(curDf$chr)
acf(curDf$methylation, ylim=c(0,1), xlim=c(0,30),
main=paste0("chr", curChr,": ", round(curRange[1]/1e3, digits = 2),
" - ", round(curRange[2]/1e3, digits = 2), "kb"))
} else next
}
rafalib::mypar(mfrow=c(4,4),
mar=c(1,2.5,3.2,1))
#for(rr in 1:27){
for(rr in 100:127){
curAvgMeth <- readCalls$avgMeth[rr]
if(curAvgMeth >0 & curAvgMeth < 1){
rid <- readCalls$readID[rr]
curDf <- binaryCalls[binaryCalls$readID == rid,]
curRange <- range(curDf$pos)
curChr <- unique(curDf$chr)
y <- curDf$methylation
y <- y - mean(y)
maxLag <- 30
n <- length(y)
acf <- c()
for(ll in 1:maxLag){
acf[ll] <- cor(y[1:(n-ll)], y[(ll+1):n])
}
plot(x=1:maxLag, y=acf, type='h', ylim=c(0,1),
main=paste0("chr", curChr,": ", round(curRange[1]/1e3, digits = 2),
" - ", round(curRange[2]/1e3, digits = 2), "kb"))
} else next
}
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
## Warning in cor(y[1:(n - ll)], y[(ll + 1):n]): the standard deviation is zero
rafalib::mypar(mfrow=c(4,4))
for(rr in 1:27){
curAvgMeth <- readCalls$avgMeth[rr]
if(curAvgMeth >0 & curAvgMeth < 1){
acf(sample(binaryCalls$methylation[binaryCalls$readID == readCalls$readID[rr]]),
ylim=c(0,1))
} else next
}
There is high autocorrelation on the methylation signal on single reads, at the level of single methylated adenine nucleotides. Note that, this could however be expected. Indeed, Molly suggested that the actual biologically relevant resolution might be the resolution of linker regions inbetween nucleosomes. In order to identify these linker regions, we could use the nucleosome occupancy data from Chereji et al. (2018) and, for example, set a threshold on when a region is ‘not occupied by nucleosomes’, and then calculating autocorrelation between those regions.
CHECK: Does conditioning matter? Does correlation change when looking at (a) a position close to the silencer regions with a position further out in the HMR locus, versus (b) a position in the middle of the HMR locus with a position close to the silencer region? It could be that, for example, if the position close to the silencer region is methylated, there’s a reasonable probability that a position in the middle of the locus is also methylated. However, the reverse may be less clear. It’s possible that if the position in the middle of the locus is not methylated, the position close to the silencer region will never be methylated, for example.
meth <- fread("~/data/molly/modified_bases.6mA.bed")
colnames(meth) <- c("chrom", "start", "end", "name", "score",
"strand", "startCodon", "stopCodon", "color",
"coverage", "percentage")
meth_cols <- c("chrom", "start", "coverage", "percentage")
filtered_meth <- meth[coverage > 10, ..meth_cols]
nucs <- fread("~/data/molly/Chereji_Occupancy_rep1_singlebp.bedGraph")
colnames(nucs) <- c("chrom", "start", "end", "occupancy")
#plot percentage methylation as a column chart
HMR <- filtered_meth[chrom == "III" & start > 291e3 & start < 294e3]
HMR_nucs <- nucs[chrom == "chrIII" & start > 291e3 & start < 294e3]
ggplot() +
geom_col(HMR, mapping = aes(x = start, y = percentage), color = "mediumpurple4") +
geom_col(HMR_nucs, mapping = aes(x = start, y = occupancy * 20), color = "black") +
theme_minimal() +
labs(title = "SIR3-EcoGII (JRY13027)",
x = "position on chr III",
y = "% methylated reads")
plot(x=HMR$start, y=rep(-3,3,length.out=nrow(HMR)),
type="n", ylim=c(-3,3),
ylab="Scaled average methylation / occupancy",
xlab="Chromosome position")
# Methylation
loMethyl <- loess(percentage/100 ~ start, data=HMR, weights=HMR$coverage, enp.target = 50)
lines(x=loMethyl$x[order(loMethyl$x)], y=scale(loMethyl$fitted[order(loMethyl$x)]),
col="orange", lwd=4)
# Nucleosome occupancy
loOccup <- loess(occupancy/max(occupancy) ~ start, data=HMR_nucs, enp.target = 50)
lines(x=loOccup$x, y=scale(loOccup$fitted), col="darkseagreen3", lwd=4)
legend("topleft", c("Methylation", "Nucleosome occupancy"),
col=c("orange", "darkseagreen3"), lty=1, cex=1,
bty='n', lwd=4)
# Set arbitrary threshold for defining linnker regions
plot(HMR_nucs$start, HMR_nucs$occupancy, pch=16, cex=1/3)
abline(h=0.5, lty=2, col="red")
# Aggregate methylation calls at the level of linker regions
FtoT <- which(diff(HMR_nucs$occupancy < 1/2) == 1)
TtoF <- which(diff(HMR_nucs$occupancy < 1/2) == -1)
dfLink <- data.frame(start = FtoT,
end = TtoF)
# only keep linker regions with at least 5bp in size
dfLink <- dfLink[(dfLink$end - dfLink$start) >=5,]
plot(HMR_nucs$start, HMR_nucs$occupancy, pch=16, cex=1/3)
abline(h=0.5, lty=2, col="red")
abline(v=HMR_nucs$start[dfLink$start], col="blue")
abline(v=HMR_nucs$start[dfLink$end], col="green")
binaryCalls <- fread(file = "/Users/koenvandenberge/data/molly/megalodon_output_06/per_read_modified_base_calls_binary.txt.gz",
nrows = 6e6, skip=11e6)
colnames(binaryCalls) <- c("readID", "chr", "strand", "pos",
"mod_log_prob", "can_log_prob", "mod_base")
table(binaryCalls$chr)
##
## II III IV
## 1606661 3632511 760828
binaryCalls$methylation <- ifelse(binaryCalls$mod_log_prob > -0.2231436, 1, 0)
binaryCalls3 <- binaryCalls[binaryCalls$chr == "III",]
## organize reads
readInfo <- binaryCalls3 %>%
group_by(readID) %>%
summarize(start=min(pos),
end=max(pos),
chr=unique(chr))
startLink <- HMR_nucs$start[dfLink$start[1]]
endLink <- HMR_nucs$start[dfLink$end[nrow(dfLink)]]
# overlappingReads <- readInfo[(readInfo$end >= 291e3 & readInfo$start <=294e3) | (readInfo$start < 294e3 & readInfo$start > 290e3),]
overlappingReads <- readInfo[(readInfo$end >= 291e3 & readInfo$start <=294e3),]
readAvMethList <- list()
for(rr in 1:nrow(overlappingReads)){
curData <- binaryCalls3[binaryCalls3$readID == overlappingReads$readID[rr],]
avMethLinkers <- c()
for(bb in 1:nrow(dfLink)){
curStart <- HMR_nucs$start[dfLink$start[bb]]
curEnd <- HMR_nucs$end[dfLink$end[bb]]
curId <- curData$pos >= curStart & curData$pos <= curEnd
if(sum(curId) > 0){
curBinData <- curData[curData$pos >= curStart & curData$pos <= curEnd,]
avMethLinkers[bb] <- mean(curBinData$methylation)
names(avMethLinkers)[bb] <- nrow(curBinData) #coverage
} else {
avMethLinkers[bb] <- NA
}
}
readAvMethList[[rr]] <- avMethLinkers
}
avMethMat <- do.call(rbind, readAvMethList)
colSums(is.na(avMethMat)) # often no data in defined linker regions
## <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 1 1
## 34 19 37 21 54 86 27 31 28 27 49 42
# focus on 24 reads with data in all linker regions
readAvMethListComplete <- readAvMethList[rowSums(is.na(avMethMat)) == 0]
rafalib::mypar(mfrow=c(2,2))
for(ll in 1:length(readAvMethListComplete)){
y <- readAvMethListComplete[[ll]]
if(var(y)>0){
acf(y)
}
}
# aggregate acf across all reads
acfMat <- matrix(NA, nrow=length(readAvMethListComplete),
ncol=11)
rafalib::mypar(mfrow=c(1,1))
for(ll in 1:length(readAvMethListComplete)){
y <- readAvMethListComplete[[ll]]
if(var(y)>0){
acfMat[ll,] <- acf(y, plot=FALSE)$acf[1:11,1,1]
}
}
plot(x=0:10, y=colMeans(acfMat), type='h',
xlab="Lag", ylab="Average ACF",
main="ACF averaged across all complete reads")
abline(h=0, lty=2)
# # all reads
# rafalib::mypar(mfrow=c(4,4))
# for(ll in 1:length(readAvMethList)){
# y <- readAvMethList[[ll]]
# y <- y[!is.na(y)]
# if(length(y) > 3 & var(y)>0){
# acf(y)
# }
# }
nucDat <- openxlsx::read.xlsx("../../data/HMR_HML_nucleosome positions.xlsx")
nucDatHMR <- nucDat[nucDat$region == "HMR",]
overlappingReadsDyad <- readInfo[(readInfo$end >= min(nucDatHMR$dyad) & readInfo$start <= max(nucDatHMR$dyad)),]
dfLinkDyad <- data.frame(start = nucDatHMR$dyad[-nrow(nucDatHMR)]+1,
stop = nucDatHMR$dyad[2:nrow(nucDatHMR)])
readAvMethListDyad <- list()
for(rr in 1:nrow(overlappingReadsDyad)){
curData <- binaryCalls3[binaryCalls3$readID == overlappingReadsDyad$readID[rr],]
avMethLinkers <- c()
for(bb in 1:nrow(dfLinkDyad)){
curStart <- dfLinkDyad$start[bb]
curEnd <- dfLinkDyad$stop[bb]
curId <- curData$pos >= curStart & curData$pos <= curEnd
if(sum(curId) > 0){
curBinData <- curData[curData$pos >= curStart & curData$pos <= curEnd,]
avMethLinkers[bb] <- mean(curBinData$methylation)
names(avMethLinkers)[bb] <- nrow(curBinData) #coverage
} else {
avMethLinkers[bb] <- NA
}
}
readAvMethListDyad[[rr]] <- avMethLinkers
}
avMethMatDyad <- do.call(rbind, readAvMethListDyad)
colSums(is.na(avMethMatDyad)) # often no data in defined linker regions
## <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> 1 37 24 23 30 38
## 57 67 68 67 76 72 74 76 75 78 78 77 79 78 78 79
## 2
## 78 79 70 73 72 63 63 61 63 68 70 66 69
# focus on 62 reads with data in all linker regions
readAvMethListDyadComplete <- readAvMethListDyad[rowSums(is.na(avMethMatDyad)) == 0]
rafalib::mypar(mfrow=c(2,2))
for(ll in 1:length(readAvMethListDyadComplete)){
y <- readAvMethListDyadComplete[[ll]]
if(var(y)>0){
acf(y)
}
}
# aggregate acf across all reads
acfMat <- matrix(NA, nrow=length(readAvMethListDyadComplete),
ncol=15)
rafalib::mypar(mfrow=c(1,1))
for(ll in 1:length(readAvMethListDyadComplete)){
y <- readAvMethListDyadComplete[[ll]]
if(var(y)>0){
acfMat[ll,] <- acf(y, plot=FALSE)$acf[1:15,1,1]
}
}
plot(x=0:14, y=colMeans(acfMat), type='h',
xlab="Lag", ylab="Average ACF",
main="ACF averaged across all complete reads")
abline(h=0, lty=2)
# when permuting the data
acfMatPermuted <- matrix(NA, nrow=length(readAvMethListDyadComplete),
ncol=15)
rafalib::mypar(mfrow=c(2,2))
for(ll in 1:length(readAvMethListDyadComplete)){
y <- readAvMethListDyadComplete[[ll]]
if(var(y)>0){
acfMatPermuted[ll,] <- acf(sample(y), plot=FALSE)$acf[1:15,1,1]
}
}
plot(x=0:14, y=colMeans(acfMatPermuted), type='h',
xlab="Lag", ylab="Average ACF",
main="Permuted: ACF averaged across all complete reads")
abline(h=0, lty=2)
Note that, as the lag of the autocorrelation increases, we are looking at ever shorter sequences that reside at the extremes of the HMR region. In the example above, we have a total of \(29\) linker regions which we identified using the dyad positions. However, if the outermost dyads are actually outside the HMR region, which is unmethylated, but the read within HMR is methylated, then this is what could have caused the negative autocorrelations we’re observing above.
Below, we therefore restrict to only looking within the HMR region.
nucDat <- openxlsx::read.xlsx("../../data/HMR_HML_nucleosome positions.xlsx")
nucDatHMR <- nucDat[nucDat$region == "HMR",]
## only look at linker regions within HMR silencer regions
## HMR E is at III:292674-292769
## HMR I is at III:294805-294864
nucDatHMR <- nucDatHMR[nucDatHMR$Start >= 292674,]
nucDatHMR <- nucDatHMR[nucDatHMR$Stop <= 294864,]
nrow(nucDatHMR) #only 9 linker regions
## [1] 9
overlappingReadsDyad <- readInfo[(readInfo$end >= min(nucDatHMR$dyad) & readInfo$start <= max(nucDatHMR$dyad)),]
dfLinkDyad <- data.frame(start = nucDatHMR$dyad[-nrow(nucDatHMR)]+1,
stop = nucDatHMR$dyad[2:nrow(nucDatHMR)])
readAvMethListDyad <- list()
for(rr in 1:nrow(overlappingReadsDyad)){
curData <- binaryCalls3[binaryCalls3$readID == overlappingReadsDyad$readID[rr],]
avMethLinkers <- c()
for(bb in 1:nrow(dfLinkDyad)){
curStart <- dfLinkDyad$start[bb]
curEnd <- dfLinkDyad$stop[bb]
curId <- curData$pos >= curStart & curData$pos <= curEnd
if(sum(curId) > 0){
curBinData <- curData[curData$pos >= curStart & curData$pos <= curEnd,]
avMethLinkers[bb] <- mean(curBinData$methylation)
names(avMethLinkers)[bb] <- nrow(curBinData) #coverage
} else {
avMethLinkers[bb] <- NA
}
}
readAvMethListDyad[[rr]] <- avMethLinkers
}
avMethMatDyad <- do.call(rbind, readAvMethListDyad)
colSums(is.na(avMethMatDyad)) # often no data in defined linker regions
## <NA> 1 37 24 23 30 38 2
## 8 8 7 9 8 8 9 8
# focus on 117 reads with data in all linker regions, looks pretty random...
readAvMethListDyadComplete <- readAvMethListDyad[rowSums(is.na(avMethMatDyad)) == 0]
rafalib::mypar(mfrow=c(2,2))
for(ll in 1:length(readAvMethListDyadComplete)){
y <- readAvMethListDyadComplete[[ll]]
if(var(y)>0){
acf(y)
}
}
rafalib::mypar(mfrow=c(1,2))
hist(unlist(readAvMethListDyadComplete), breaks=20,
xlab="Avg methylatin in linker region",
main="All reads spanning HMR")
avgMethRead <- unlist(lapply(readAvMethListDyadComplete, mean))
hist(avgMethRead, breaks=20,
xlab="Avg methylatin of read",
main="All reads spanning HMR")
# aggregate acf across all reads
acfMat <- matrix(NA, nrow=length(readAvMethListDyadComplete),
ncol=8)
rafalib::mypar(mfrow=c(1,2))
for(ll in 1:length(readAvMethListDyadComplete)){
y <- readAvMethListDyadComplete[[ll]]
if(var(y)>0){
acfMat[ll,] <- acf(y, plot=FALSE)$acf[1:8,1,1]
}
}
plot(x=0:7, y=colMeans(acfMat, na.rm=TRUE), type='h',
xlab="Lag", ylab="Average ACF",
main="Avg ACF")
abline(h=0, lty=2)
# when permuting the data
acfMatPermuted <- matrix(NA, nrow=length(readAvMethListDyadComplete),
ncol=8)
for(ll in 1:length(readAvMethListDyadComplete)){
y <- readAvMethListDyadComplete[[ll]]
if(var(y)>0){
acfMatPermuted[ll,] <- acf(sample(y), plot=FALSE)$acf[1:8,1,1]
}
}
plot(x=0:7, y=colMeans(acfMatPermuted, na.rm=TRUE), type='h',
xlab="Lag", ylab="Average ACF",
main="Permuted: Avg ACF",
ylim=c(-0.2, 1))
abline(h=0, lty=2)
### By average methylation in the read
acfMatLow <- acfMat[avgMethRead <= 0.4,]
acfMatMed <- acfMat[avgMethRead > 0.4 & avgMethRead <= 0.6,]
acfMatHigh <- acfMat[avgMethRead > 0.6,]
rafalib::mypar(mfrow=c(1,3))
plot(x=0:7, y=colMeans(acfMatLow, na.rm=TRUE), type='h',
xlab="Lag", ylab="Average ACF",
main="Low methylation")
abline(h=0, lty=2)
plot(x=0:7, y=colMeans(acfMatMed, na.rm=TRUE), type='h',
xlab="Lag", ylab="Average ACF",
main="Medium methylation")
abline(h=0, lty=2)
plot(x=0:7, y=colMeans(acfMatHigh, na.rm=TRUE), type='h',
xlab="Lag", ylab="Average ACF",
main="High methylation")
abline(h=0, lty=2)
## Heatmap of co-methylation
library(pheatmap)
avMethMat <- do.call(rbind, readAvMethListDyadComplete)
pheatmap(cor(avMethMat), cluster_rows=FALSE, cluster_cols=FALSE)
avMethMatCentered <- avMethMat - rowMeans(avMethMat)
pheatmap(cor(avMethMatCentered), cluster_rows=FALSE, cluster_cols=FALSE)
nucDat <- openxlsx::read.xlsx("../../data/HMR_HML_nucleosome positions.xlsx")
nucDatHMR <- nucDat[nucDat$region == "HMR",]
## only look at linker regions within HMR silencer regions
## HMR E is at III:292674-292769
## HMR I is at III:294805-294864
nucDatHMR <- nucDatHMR[nucDatHMR$Start >= 293200,]
nucDatHMR <- nucDatHMR[nucDatHMR$Stop <= 294864,]
nrow(nucDatHMR) #only 9 linker regions
## [1] 6
overlappingReadsDyad <- readInfo[(readInfo$end >= min(nucDatHMR$dyad) & readInfo$start <= max(nucDatHMR$dyad)),]
dfLinkDyad <- data.frame(start = nucDatHMR$dyad[-nrow(nucDatHMR)]+1,
stop = nucDatHMR$dyad[2:nrow(nucDatHMR)])
readAvMethListDyad <- list()
for(rr in 1:nrow(overlappingReadsDyad)){
curData <- binaryCalls3[binaryCalls3$readID == overlappingReadsDyad$readID[rr],]
avMethLinkers <- c()
for(bb in 1:nrow(dfLinkDyad)){
curStart <- dfLinkDyad$start[bb]
curEnd <- dfLinkDyad$stop[bb]
curId <- curData$pos >= curStart & curData$pos <= curEnd
if(sum(curId) > 0){
curBinData <- curData[curData$pos >= curStart & curData$pos <= curEnd,]
avMethLinkers[bb] <- mean(curBinData$methylation)
names(avMethLinkers)[bb] <- nrow(curBinData) #coverage
} else {
avMethLinkers[bb] <- NA
}
}
readAvMethListDyad[[rr]] <- avMethLinkers
}
avMethMatDyad <- do.call(rbind, readAvMethListDyad)
colSums(is.na(avMethMatDyad)) # often no data in defined linker regions
## 24 23 30 38 2
## 5 4 4 5 4
# focus on 117 reads with data in all linker regions, looks pretty random...
readAvMethListDyadComplete <- readAvMethListDyad[rowSums(is.na(avMethMatDyad)) == 0]
rafalib::mypar(mfrow=c(2,2))
for(ll in 1:length(readAvMethListDyadComplete)){
y <- readAvMethListDyadComplete[[ll]]
if(var(y)>0){
acf(y)
}
}
hist(unlist(readAvMethListDyadComplete), breaks=20,
xlab="Avg methylatin in linker region",
main="All reads spanning HMR")
avgMethRead <- unlist(lapply(readAvMethListDyadComplete, mean))
hist(avgMethRead, breaks=20,
xlab="Avg methylatin of read",
main="All reads spanning HMR")
# aggregate acf across all reads
acfMat <- matrix(NA, nrow=length(readAvMethListDyadComplete),
ncol=nrow(nucDatHMR)-1)
rafalib::mypar(mfrow=c(1,2))
for(ll in 1:length(readAvMethListDyadComplete)){
y <- readAvMethListDyadComplete[[ll]]
if(var(y)>0){
acfMat[ll,] <- acf(y, plot=FALSE)$acf[1:(nrow(nucDatHMR)-1),1,1]
}
}
plot(x=0:(nrow(nucDatHMR)-2), y=colMeans(acfMat, na.rm=TRUE), type='h',
xlab="Lag", ylab="Average ACF",
main="Avg ACF")
abline(h=0, lty=2)
# when permuting the data
acfMatPermuted <- matrix(NA, nrow=length(readAvMethListDyadComplete),
ncol=(nrow(nucDatHMR)-1))
for(ll in 1:length(readAvMethListDyadComplete)){
y <- readAvMethListDyadComplete[[ll]]
if(var(y)>0){
acfMatPermuted[ll,] <- acf(sample(y), plot=FALSE)$acf[1:(nrow(nucDatHMR)-1),1,1]
}
}
plot(x=0:(nrow(nucDatHMR)-2), y=colMeans(acfMatPermuted, na.rm=TRUE), type='h',
xlab="Lag", ylab="Average ACF",
main="Permuted: Avg ACF",
ylim=c(-0.2, 1))
abline(h=0, lty=2)
### By average methylation in the read
acfMatLow <- acfMat[avgMethRead <= 0.4,]
acfMatMed <- acfMat[avgMethRead > 0.4 & avgMethRead <= 0.6,]
acfMatHigh <- acfMat[avgMethRead > 0.6,]
rafalib::mypar(mfrow=c(1,3))
plot(x=0:(nrow(nucDatHMR)-2), y=colMeans(acfMatLow, na.rm=TRUE), type='h',
xlab="Lag", ylab="Average ACF",
main="Low methylation")
abline(h=0, lty=2)
plot(x=0:(nrow(nucDatHMR)-2), y=colMeans(acfMatMed, na.rm=TRUE), type='h',
xlab="Lag", ylab="Average ACF",
main="Medium methylation")
abline(h=0, lty=2)
plot(x=0:(nrow(nucDatHMR)-2), y=colMeans(acfMatHigh, na.rm=TRUE), type='h',
xlab="Lag", ylab="Average ACF",
main="High methylation")
abline(h=0, lty=2)